500e15dda4bc0fe7a698aba310a16438b6894a72,tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java,ExtractComparer,compareFiles,#EvalFilePaths#EvalFilePaths#,222
Before Change
protected void compareFiles(EvalFilePaths fpsA, EvalFilePaths fpsB) throws IOException {
List<Metadata> metadataListA =
extractReader.loadExtract(fpsA.getExtractFile(), alterExtractList);
List<Metadata> metadataListB =
extractReader.loadExtract(fpsB.getExtractFile(), alterExtractList);
//array indices for those metadata items handled in
//"that"
Set<Integer> handledB = new HashSet<>();
String containerID = Integer.toString(CONTAINER_ID.getAndIncrement());
//container table
Map<Cols, String> contData = new HashMap<>();
contData.put(Cols.CONTAINER_ID, containerID);
contData.put(Cols.FILE_PATH, fpsA.getRelativeSourceFilePath().toString());
long srcFileLength = getSourceFileLength(metadataListA, metadataListB);
contData.put(Cols.LENGTH,
srcFileLength > NON_EXISTENT_FILE_LENGTH ?
Long.toString(srcFileLength) : "");
contData.put(Cols.FILE_EXTENSION,
FilenameUtils.getExtension(fpsA.getRelativeSourceFilePath().getFileName().toString()));
long extractFileLengthA = getFileLength(fpsA.getExtractFile());
contData.put(Cols.EXTRACT_FILE_LENGTH_A, extractFileLengthA > NON_EXISTENT_FILE_LENGTH ?
Long.toString(extractFileLengthA) : "");
long extractFileLengthB = getFileLength(fpsA.getExtractFile());
contData.put(Cols.EXTRACT_FILE_LENGTH_B, extractFileLengthB > NON_EXISTENT_FILE_LENGTH ?
Long.toString(extractFileLengthB) : "");
writer.writeRow(COMPARISON_CONTAINERS, contData);
if (metadataListA == null) {
writeError(ERROR_TABLE_A, containerID, fpsA.getRelativeSourceFilePath().toString(),
fpsA.getExtractFile());
}
if (metadataListB == null) {
writeError(ERROR_TABLE_B, containerID, fpsB.getRelativeSourceFilePath().toString(),
fpsB.getExtractFile());
}
if (metadataListA == null && metadataListB == null) {
return;
}
List<Integer> numAttachmentsA = countAttachments(metadataListA);
List<Integer> numAttachmentsB = countAttachments(metadataListB);
//now get that metadata
if (metadataListA != null) {
for (int i = 0; i < metadataListA.size(); i++) {
String fileId = Integer.toString(ID.getAndIncrement());
Metadata metadataA = metadataListA.get(i);
Metadata metadataB = null;
//TODO: shouldn't be fileA!!!!
After Change
List<Metadata> metadataListA = null;
if (extractExceptionA == null) {
try {
metadataListA = extractReader.loadExtract(fpsA.getExtractFile(),
alterExtractList, minExtractLength, maxExtractLength);
} catch (ExtractReaderException e) {
extractExceptionA = e.getType();
}
}
List<Metadata> metadataListB = null;
try {
metadataListB = extractReader.loadExtract(fpsB.getExtractFile(),
alterExtractList, minExtractLength, maxExtractLength);
} catch (ExtractReaderException e) {
extractExceptionB = e.getType();
}
//array indices for those metadata items handled in B
Set<Integer> handledB = new HashSet<>();
String containerID = Integer.toString(ID.getAndIncrement());
//container table
Map<Cols, String> contData = new HashMap<>();
contData.put(Cols.CONTAINER_ID, containerID);
contData.put(Cols.FILE_PATH, fpsA.getRelativeSourceFilePath().toString());
long srcFileLength = getSourceFileLength(metadataListA, metadataListB);
contData.put(Cols.LENGTH,
srcFileLength > NON_EXISTENT_FILE_LENGTH ?
Long.toString(srcFileLength) : "");
contData.put(Cols.FILE_EXTENSION,
FilenameUtils.getExtension(fpsA.getRelativeSourceFilePath().getFileName().toString()));
long extractFileLengthA = getFileLength(fpsA.getExtractFile());
contData.put(Cols.EXTRACT_FILE_LENGTH_A, extractFileLengthA > NON_EXISTENT_FILE_LENGTH ?
Long.toString(extractFileLengthA) : "");
long extractFileLengthB = getFileLength(fpsB.getExtractFile());
contData.put(Cols.EXTRACT_FILE_LENGTH_B, extractFileLengthB > NON_EXISTENT_FILE_LENGTH ?
Long.toString(extractFileLengthB) : "");
writer.writeRow(COMPARISON_CONTAINERS, contData);
if (extractExceptionA != null) {
writeExtractException(EXTRACT_EXCEPTION_TABLE_A, containerID, fpsA.getRelativeSourceFilePath().toString(),
extractExceptionA);
}
if (extractExceptionB != null) {
writeExtractException(EXTRACT_EXCEPTION_TABLE_B, containerID, fpsB.getRelativeSourceFilePath().toString(),
extractExceptionB);
}
if (metadataListA == null && metadataListB == null) {
return;
}
List<Integer> numAttachmentsA = countAttachments(metadataListA);
List<Integer> numAttachmentsB = countAttachments(metadataListB);
//now get that metadata
if (metadataListA != null) {
for (int i = 0; i < metadataListA.size(); i++) {
//the first file should have the same id as the container id
String fileId = (i == 0) ? containerID : Integer.toString(ID.getAndIncrement());
Metadata metadataA = metadataListA.get(i);
Metadata metadataB = null;
//TODO: shouldn't be fileA!!!!